require(nlme)
require(reshape2)
require(ggplot2)
require(multcomp)
require(emmeans)
require(rstatix)
require(ggpubr)
require(RColorBrewer)
require(tidyverse)
require(dplyr)
require(ggbeeswarm)
require(readxl)
require(broom)
require(scales)
require(RCurl)
require(plotly)
require(ggrepel)
require(gridExtra)
require(ggExtra)
require(DT)
source("./HelperFunctions.R")
theme_point<-theme_bw()+theme(strip.background = element_blank())
theme_bar<-theme_bw()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank())
theme_boxplot<-theme_classic()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.title.x = element_blank(),axis.line.x = element_blank(),legend.position = "none")
color_panel<-c("#e35d6a","#5bb75b","#428bca","#e87810","#23496b","#ffbf00","#cc2028","#039748","pink","darkgray")
names(color_panel)<-sort(c("serum","Biomatrica","EDTA separator","EDTA","Citrate","RNA Streck","Roche","DNA Streck", "PAXgene", "ACD-A"))
set.seed(1)#Introduction
For every metric used to compare kit-tube combinations, we tested the possibility of interaction. We build a linear mixed-effects model with tube, RNA isolation kit and timelapse as fixed effects and donorID as random effect.
Sample annotation with info about tube, kit, used input volume, eluate volume etc., volume unit is µL
#Annotation
sample_annotation<-read_tsv("./Annotation_exRNAQC017.csv")
colnames(sample_annotation)[23]<-'TimeInterval'
sample_annotation<-sample_annotation %>% mutate(donorID= case_when((Donor == "PNL-XNID") ~ "D5",
(Donor == "PNL-7DEN") ~ "D1",
(Donor == "PNL-8ZI1") ~ "D2",
(Donor == "PNL-NLID") ~ "D3",
(Donor == "PNL-UCH7") ~ "D4"),
SampleID= paste0(GraphKit,"_",GraphTube,"_",TimeInterval,"_",donorID),
tube_kitID=paste0(GraphTube,"_",GraphKit),
tube_kit_timeID=paste0(GraphTube,"_",GraphKit,"_",TimeInterval),
PlasmaInputVml= PlasmaInputV/1000)
sample_annotation$Tube<-as.factor(sample_annotation$Tube)
sample_annotation$RNAisolation<-as.factor(sample_annotation$RNAisolation)
sample_annotation$TimeInterval<-as.factor(sample_annotation$TimeInterval)
sample_annotation$donorID<-as.factor(sample_annotation$donorID)
#Reads
miRNAs <- data.table::fread("mergedTxts/allmiRs_subs.txt", header=T, data.table = F)
spikes <- data.table::fread("mergedTxts/allspikes_subs.txt", header=T, data.table = F)
reads <- data.table::fread("mergedTxts/allreads_subs.txt", header=T, data.table = F)
contam <- data.table::fread("mergedTxts/allcontam_subs.txt", header=T, data.table = F)
reads_total<- read_tsv("./smallRNA_table.tsv")
reads_total <- full_join(reads_total, sample_annotation, by = "RNAID")
Tube.levels<-levels(reads_total$Tube)
RNAisolation.levels<-levels(reads_total$RNAisolation)
TimeInterval.levels<-levels(reads_total$TimeInterval)
#ggplot(reads_total, aes(x = TimeInterval, y = total_reads, group = donorID:RNAisolation)) +
# geom_line(aes(color=RNAisolation))+geom_point() + facet_grid(. ~ Tube) + labs(title = "Raw Number of Reads")
spikes_sum <- spikes %>% gather(key="UniqueID",value="spikecounts",-"spikeID") %>%
group_by(UniqueID) %>% dplyr::summarise(counts=sum(spikecounts, na.rm=T)) %>%
#spread(key=UniqueID,value=spikesum) %>%
mutate(reads="spikes")
# add sum of spikes to the total mapped reads (these were not considered as "mapped" yet)
reads_complete <- reads %>% gather(key="UniqueID",value="counts",-"reads") %>%
rbind(., spikes_sum) %>%
spread(key=reads, value=counts) %>%
mutate(all_mapped = mapped + spikes) %>%
gather(key="reads",value="counts",-"UniqueID")
DT::datatable(sample_annotation %>% dplyr::select(c("RNAID","RNAisolation","PlasmaInputV","donorID","Tube","TimeInterval","Sequinconc","ERCCconc",Abbrevation="GraphKit")), rownames = TRUE, filter="top", options = list(pageLength = 10, scrollX=T), caption = "Sample annotation table")calculation: count mapped miRNAS / count mapped LP spikes
gene_level_ratios <- rbind(
reads %>%
filter(reads=="mapped_miR") %>%
mutate(type="endogenous") %>%
group_by(type) %>%
dplyr::summarise_if(is.numeric, sum, na.rm=TRUE),spikes %>%
mutate(type=gsub(".-..$","",spikeID)) %>%
group_by(type) %>%
dplyr::summarise_if(is.numeric, sum, na.rm=TRUE)
) %>%
gather(., key="UniqueID",value="counts",-type) %>% #long format
spread(., key = "type", value="counts") %>%
mutate("LPvsEndo"=LP/endogenous, "RCvsEndo"=RC/endogenous, "EndovsRC"=endogenous/RC, "EndovsLP"= endogenous/LP) %>%
left_join(., sample_annotation %>%
dplyr::select(c("UniqueID","RNAID","RNAisolation","Tube","SampleID","GraphTube","EluateV","PlasmaInputV", "BiologicalReplicate", "TimeInterval","Donor", "donorID")), by=c("UniqueID" = "RNAID")) #add annotationgene_level_ratios <- gene_level_ratios %>%
mutate("EndovsLP_InputCorr"= (EndovsLP)*EluateV/PlasmaInputV)
Tube.levels<-levels(gene_level_ratios$Tube)
RNAisolation.levels<-levels(gene_level_ratios$RNAisolation)
TimeInterval.levels<-levels(gene_level_ratios$TimeInterval)Linear mixed-effects model with crossed fixed effects of tube, kit and TimeInterval.
EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
weights = varIdent(form = ~ 1 | RNAisolation),
data=gene_level_ratios)
anova(EndovsLP_m1)EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_m1)Only Tube, and the interactions Tube:RNAisolation and Tube:TimeInterval are significant. There are no significant three-way interactions.
Next, we look at second-order interactions:
EndovsLP_m2<-lme(EndovsLP~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_m2)EndovsLP_m3<-lme(EndovsLP~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_m3)checking normality of residuals
qqnorm(EndovsLP_m3$residuals)
qqline(EndovsLP_m3$residuals)For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.
EndovsLP_m.time<-emmeans(EndovsLP_m3, ~TimeInterval|Tube)
plot(EndovsLP_m.time, comparisons=TRUE)pairs(EndovsLP_m.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -6.34 33.1 74 -0.191 0.9800
## T0 - T16 -11.16 33.1 74 -0.337 0.9395
## T04 - T16 -4.82 33.1 74 -0.145 0.9884
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 19.18 33.1 74 0.579 0.8318
## T0 - T16 -113.14 33.1 74 -3.414 0.0030
## T04 - T16 -132.33 33.1 74 -3.993 0.0004
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 24.20 33.1 74 0.730 0.7464
## T0 - T16 58.03 33.1 74 1.751 0.1933
## T04 - T16 33.83 33.1 74 1.021 0.5660
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Now, for both isolation kits the effect of tube is investigated
EndovsLP_m.method<-emmeans(EndovsLP_m3, ~Tube|RNAisolation)
plot(EndovsLP_m.method, comparisons=TRUE)pairs(EndovsLP_m.method)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -85.1 27.1 74 -3.144 0.0067
## Citrate - serum -14.2 27.1 74 -0.526 0.8587
## EDTA - serum 70.8 27.1 74 2.617 0.0286
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -94.0 27.1 74 -3.473 0.0025
## Citrate - serum -109.0 27.1 74 -4.027 0.0004
## EDTA - serum -15.0 27.1 74 -0.554 0.8447
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
calculation: count mapped miRNAS / count mapped RC spikes
gene_level_ratios$GraphTube2<- paste(gene_level_ratios$Tube, gene_level_ratios$TimeInterval, sep = "_")
test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsRC", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes2 <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) +
#geom_bar(position=position_dodge(), stat="identity")+
geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
geom_point(aes(colour=Tube), size=1.2) +
geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
labs(x="", y="relative small RNA conc. (rescaled)", title=NULL, col = NULL, fill = NULL) +
scale_colour_manual(values=color_panel) +
scale_y_log10() +
theme_bar
ggplotly(spikes2)Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.
EndovsRC_m1<-lme(EndovsRC~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsRC_m1)anova_EndovsRC_m1 <- round(anova(EndovsRC_m1)[c(5, 6, 7), c(4)], digits = 3)Second order interactions
EndovsRC_m2<-lme(EndovsRC~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsRC_m2)Only significant interactions
EndovsRC_m3<-lme(EndovsRC~Tube+RNAisolation+TimeInterval+Tube:TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsRC_m3)For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.
EndovsRC.time<-emmeans(EndovsRC_m3, ~TimeInterval|Tube)
plot(EndovsRC.time, comparisons=TRUE)pairs(EndovsRC.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -17.647 18.1 76 -0.976 0.5945
## T0 - T16 -18.139 18.1 76 -1.003 0.5774
## T04 - T16 -0.493 18.1 76 -0.027 0.9996
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 13.113 18.1 76 0.725 0.7495
## T0 - T16 -51.912 18.1 76 -2.870 0.0145
## T04 - T16 -65.025 18.1 76 -3.595 0.0016
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 16.067 18.1 76 0.888 0.6493
## T0 - T16 18.463 18.1 76 1.021 0.5662
## T04 - T16 2.396 18.1 76 0.132 0.9904
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
EDTA has a significantly higher smallRNA concentration in plasma at time T16.
Calculation: Endo/LP * EluateV/PlasmaInputV
test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsLP_InputCorr", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes_conc <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) +
#geom_bar(position=position_dodge(), stat="identity")+
facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
geom_point(aes(colour=Tube), size=1.2) +
geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
labs(x="", y="relative endogenous small RNA concentration", title=NULL, subtitle="endogenous small RNA vs LP (rescaled)", col = NULL) +
scale_colour_manual(values=color_panel) +
scale_y_log10() +
theme_bar
ggplotly(spikes_conc)Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.
EndovsLP_InputCorr_m1<-lme(EndovsLP_InputCorr~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_InputCorr_m1)anova_EndovsLP_InputCorr_m1 <- round(anova(EndovsLP_InputCorr_m1)[c(5, 6, 7), c(4)], digits = 3)Second order interactions
EndovsLP_InputCorr_m2<-lme(EndovsLP_InputCorr~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_InputCorr_m2)keeping only significant interactions
EndovsLP_InputCorr_m3<-lme(EndovsLP_InputCorr~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random= ~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_InputCorr_m3)Checking normality of residuals
qqnorm(EndovsLP_InputCorr_m3$residuals)
qqline(EndovsLP_InputCorr_m3$residuals)For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.
EndovsLP_InputCorr_m.time<-emmeans(EndovsLP_InputCorr_m3, ~TimeInterval|Tube)
plot(EndovsLP_InputCorr_m.time, comparisons=TRUE)pairs(EndovsLP_InputCorr_m.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -0.656 1.7 74 -0.386 0.9212
## T0 - T16 -1.040 1.7 74 -0.612 0.8138
## T04 - T16 -0.384 1.7 74 -0.226 0.9722
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.602 1.7 74 0.943 0.6149
## T0 - T16 -6.004 1.7 74 -3.535 0.0020
## T04 - T16 -7.606 1.7 74 -4.478 0.0001
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.407 1.7 74 0.828 0.6866
## T0 - T16 2.703 1.7 74 1.591 0.2559
## T04 - T16 1.295 1.7 74 0.763 0.7270
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
For both isolation kits, the effect of tube is investigated.
EndovsLP_InputCorr_m.method<-emmeans(EndovsLP_InputCorr_m3, ~Tube|RNAisolation)
plot(EndovsLP_InputCorr_m.method, comparisons=TRUE)pairs(EndovsLP_InputCorr_m.method)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -8.51 1.39 74 -6.133 <.0001
## Citrate - serum -1.42 1.39 74 -1.027 0.5624
## EDTA - serum 7.08 1.39 74 5.106 <.0001
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -3.13 1.39 74 -2.258 0.0682
## Citrate - serum -3.63 1.39 74 -2.619 0.0285
## EDTA - serum -0.50 1.39 74 -0.360 0.9310
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
number of miRNA with counts > 6
cutoff_kit <- data.frame(Tube = sample_annotation$Tube, GraphTube = sample_annotation$GraphTube, median_th = 3)miRNAs_long <- miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate)), by=c("UniqueID" = "RNAID"))
#keep only miRNA coding genes with more than 0 counts
miRNAs_long <- miRNAs_long %>% filter(est_counts > 0)
number_miR_detected <- miRNAs_long %>% group_by(SampleID) %>% dplyr::summarize(miR_above0=n())
number_miR_detected <- full_join(number_miR_detected,
miRNAs_long %>% group_by(SampleID) %>%
dplyr::summarize(total_est_counts_above0=sum(est_counts)),
by="SampleID")
#cutoff_kit <- single_pos %>% group_by(GraphTube) %>% dplyr::summarise(median_th = median(filter_threshold))
miRNAs_cutoff <- miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate, TimeInterval)), by=c("UniqueID" = "RNAID"))
#left_join(., cutoff_kit, by="GraphTube") #add the median cut-off for each kit
miRNAs_cutoff <- miRNAs_cutoff %>%
filter(est_counts > 6)
number_miR_detected <- full_join(number_miR_detected,
miRNAs_cutoff %>% group_by(SampleID) %>%
dplyr::summarize(miR_aboveTh=n()),
by="SampleID")
number_miR_detected <- full_join(number_miR_detected,
miRNAs_cutoff %>% group_by(SampleID) %>%
dplyr::summarize(total_est_counts_aboveTh=sum(est_counts)),
by="SampleID")
number_miR_detected <- left_join(number_miR_detected,
dplyr::select(sample_annotation, c(UniqueID,RNAID, GraphTube, GraphKit, RNAisolation, EluateV,SampleID, Tube, BiologicalReplicate, TimeInterval, donorID)),
by="SampleID")
#convert to the original format
miRNAs_cutoff <- dplyr::select(miRNAs_cutoff, MIMATID, UniqueID, est_counts, Tube, BiologicalReplicate, TimeInterval) %>%
spread(., key=UniqueID, value=est_counts)
#write.csv( miRNAs_cutoff, file="../../../exRNAQC/ miRNAs_cutoff.csv",row.names=FALSE, na="")Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.
count_m1<-lme(miR_aboveTh~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=number_miR_detected)
anova(count_m1)anova_count_m1 <- round(anova(count_m1)[c(5, 6, 7), c(4)], digits = 3)Second order interactions
count_m2<-lme(miR_aboveTh~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=number_miR_detected)
anova(count_m2)Only significant interactions
count_m3<-lme(miR_aboveTh~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random=~1|donorID,
data=number_miR_detected)
anova(count_m3)Checking normality of residuals
qqnorm(count_m3$residuals)
qqline(count_m3$residuals)For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.
count_m3.time<-emmeans(count_m3, ~TimeInterval|Tube)
plot(count_m3.time, comparisons=TRUE)pairs(count_m3.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 15.2 5.24 74 2.901 0.0134
## T0 - T16 26.2 5.24 74 5.000 <.0001
## T04 - T16 11.0 5.24 74 2.099 0.0969
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -0.5 5.24 74 -0.095 0.9950
## T0 - T16 -12.5 5.24 74 -2.385 0.0508
## T04 - T16 -12.0 5.24 74 -2.290 0.0635
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.6 5.24 74 0.305 0.9499
## T0 - T16 5.6 5.24 74 1.069 0.5364
## T04 - T16 4.0 5.24 74 0.763 0.7265
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Pair-wise comparison T test
#T test
# Pairwise comparisons
count_pwc <- number_miR_detected %>% group_by(TimeInterval) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
geom_boxplot() +
geom_point() +
facet_wrap(~ as.factor(TimeInterval)) +
stat_pvalue_manual(count_pwc, label = "p.adj") +
scale_color_manual(values=color_panel)+
scale_fill_manual(values=color_panel)+
theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
labs(y = "sensitivity")
count_plotat T16 Citrate has significant lower sensitivity than the other tubes
pairs(count_m3.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 15.2 5.24 74 2.901 0.0134
## T0 - T16 26.2 5.24 74 5.000 <.0001
## T04 - T16 11.0 5.24 74 2.099 0.0969
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -0.5 5.24 74 -0.095 0.9950
## T0 - T16 -12.5 5.24 74 -2.385 0.0508
## T04 - T16 -12.0 5.24 74 -2.290 0.0635
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.6 5.24 74 0.305 0.9499
## T0 - T16 5.6 5.24 74 1.069 0.5364
## T04 - T16 4.0 5.24 74 0.763 0.7265
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
For both isolation kits the effect of tubes is investigated
count_m3.method<-emmeans(count_m3, ~Tube|RNAisolation)
plot(count_m3.method, comparisons=TRUE)pairs(count_m3.method)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 24.2 4.28 74 5.656 <.0001
## Citrate - serum -3.8 4.28 74 -0.888 0.6495
## EDTA - serum -28.0 4.28 74 -6.544 <.0001
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -24.5 4.28 74 -5.718 <.0001
## Citrate - serum -39.8 4.28 74 -9.302 <.0001
## EDTA - serum -15.3 4.28 74 -3.584 0.0017
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Pair-wise comparison T test
# Pairwise comparisons
count_pwc <- number_miR_detected %>% group_by(GraphKit) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
geom_boxplot() +
geom_point() +
facet_wrap(~ as.factor(GraphKit)) +
stat_pvalue_manual(count_pwc, label = "p.adj", y.position = c(215, 225, 235)) +
scale_color_manual(values=color_panel)+
scale_fill_manual(values=color_panel)+
theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
labs(y = "sensitivity")
count_plot#ggsave("./smallSensitivity.pdf", plot = count_plot, dpi = 300)The ALC values are summarized based on the average of the ALC values (after removing NAs). The dot plot at the end gives an overview of all these values.
Score: lower ALC = better
Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.
ALC_m1<-lme(ALC_calc~Tube*RNAisolation*TimePoint,
random=~1|BiologicalReplicate,
data=ALC)
anova(ALC_m1)anova_ALC_m1 <- round(anova(ALC_m1)[c(5, 6, 7), c(4)], digits = 3)There is a significant interaction between Tube:RNAIsolatiom
Now, second-order interactions
ALC_m2<-lme(ALC_calc~(Tube+RNAisolation+TimePoint)^2,
random=~1|BiologicalReplicate,
data=ALC)
anova(ALC_m2)There interaction between Tube:RNAIsolatiom remains significant.
Now, the model keeping only the significant interaction
ALC_m3<-lme(ALC_calc~Tube+RNAisolation+TimePoint+Tube:RNAisolation,
random=~1|BiologicalReplicate,
data=ALC)
anova(ALC_m3)Checking normality of residuals
qqnorm(ALC_m3$residuals)
qqline(ALC_m3$residuals)For both RNA isolation methods, the effect of tube is investigated. The data is collapsed over time.
ALC_m3.methods<-emmeans(ALC_m3, ~Tube|RNAisolation)
plot(ALC_m3.methods, comparisons=TRUE)pairs(ALC_m3.methods)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 0.00600 0.0109 49 0.548 0.8480
## Citrate - serum 0.02292 0.0109 49 2.093 0.1017
## EDTA - serum 0.01692 0.0109 49 1.545 0.2790
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 0.04688 0.0109 49 4.283 0.0002
## Citrate - serum 0.04910 0.0109 49 4.485 0.0001
## EDTA - serum 0.00222 0.0109 49 0.202 0.9777
##
## Results are averaged over the levels of: TimePoint
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Pair-wise T test
# Pairwise comparisons
ALC_pwc <- ALC %>% group_by(RNAisolation) %>% pairwise_t_test(ALC_calc ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
alc_plot<-ggplot(ALC, aes(x = Tube, y = ALC_calc, color = Tube)) +
geom_boxplot()+
geom_point()+
scale_colour_manual(values=color_panel)+
facet_wrap(~ RNAisolation) +
stat_pvalue_manual(ALC_pwc, label = "p.signif") +
theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
labs(title = "Tube comparison within kits (T test)", y = "Area left of the curve")
alc_plotWe are usually interested in miRNAs, and the other biotypes are often seen as contaminants. The fraction was calculated as the number of miRNA reads / total reads (mapped + spikes)
perc_miRs <- miRNAs %>% gather(key="UniqueID",value="counts", -"MIMATID") %>% mutate(type=gsub("[0-9].*$","",MIMATID)) %>%
dplyr::group_by(UniqueID,type) %>%
dplyr::summarise(sum_counts = sum(counts, na.rm=T)) %>% #calculate sum of MIMAT per sample
full_join(reads_complete %>% filter(reads=="all_mapped")) %>%
mutate(perc=sum_counts/counts*100) %>% #calculate perc of miRs and spikes of total mapped reads
left_join(sample_annotation, by=c("UniqueID" = "RNAID"))Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.
Biotype_m1<-lme(perc~Tube*RNAisolation*TimeInterval,
random=~1|BiologicalReplicate,
data=perc_miRs)
anova(Biotype_m1)anova_Biotype_m1 <- round(anova(Biotype_m1)[c(5, 6, 7), c(4)], digits = 3)Second order interactions:
Biotype_m2<-lme(perc~(Tube+RNAisolation+TimeInterval)^2,
random=~1|BiologicalReplicate,
data=perc_miRs)
anova(Biotype_m2)Keeping only the significant interactions:
Biotype_m3<-lme(perc~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random=~1|BiologicalReplicate,
data=perc_miRs)
anova(Biotype_m3)For both isolation kits the effect of tubes was investigated.
Biotype_m3.methods<-emmeans(Biotype_m3, ~Tube|RNAisolation)
plot(Biotype_m3.methods, comparisons=TRUE)pairs(Biotype_m3.methods)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -6.09 2.46 78 -2.474 0.0407
## Citrate - serum 14.81 2.46 78 6.021 <.0001
## EDTA - serum 20.90 2.46 78 8.495 <.0001
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -38.44 2.46 78 -15.628 <.0001
## Citrate - serum -15.22 2.46 78 -6.189 <.0001
## EDTA - serum 23.22 2.46 78 9.439 <.0001
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Pair-wise T test
# Pairwise comparisons
Biotype_pwc <- perc_miRs %>% group_by(RNAisolation) %>% pairwise_t_test(perc ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
Biotype_plot<-ggplot(perc_miRs, aes(x = Tube, y = perc, color = Tube)) +
geom_boxplot()+
geom_point()+
scale_colour_manual(values=color_panel)+
facet_wrap(~ RNAisolation) +
stat_pvalue_manual(Biotype_pwc, label = "p.signif") +
theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
labs(title = "Tube comparison within kits (T test)", y = "miRNA counts (%)")
Biotype_plotFor all tubes the effect of time interval was investigated
Biotype_m3.time<-emmeans(Biotype_m3, ~TimeInterval|Tube)
plot(Biotype_m3.time, comparisons=TRUE)pairs(Biotype_m3.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 0.590 3.01 78 0.196 0.9791
## T0 - T16 13.105 3.01 78 4.350 0.0001
## T04 - T16 12.515 3.01 78 4.154 0.0002
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.672 3.01 78 0.555 0.8442
## T0 - T16 2.018 3.01 78 0.670 0.7816
## T04 - T16 0.346 3.01 78 0.115 0.9928
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 4.579 3.01 78 1.520 0.2873
## T0 - T16 7.874 3.01 78 2.614 0.0286
## T04 - T16 3.295 3.01 78 1.094 0.5208
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Highly significant interactions were found between Tube:RNAisolation and Tube:TimeInterval for metrics: extraction efficiency, sensitivity, and biotype. No significant interaction between RNAisolation:Timeinterval.
overview <- data.frame(row.names = c("Tube:RNAisolation","Tube:TimeInterval","RNAisolation:TimeInterval"), "RNA concentration" = anova_EndovsRC_m1, "extraction efficiency" = anova_EndovsLP_InputCorr_m1, "sensitivity" = anova_count_m1, "reproducibility" = anova_ALC_m1, "biotype" = anova_Biotype_m1, check.names = FALSE)
overview_t <- t(overview)
#breaks for heatmap
bk1 <- c(seq(0,0.051,by=0.005))
#make color palette
custom_palette = c(colorRampPalette(brewer.pal(n = 7, name =
"RdBu"))(20))
custom_palette2 = c(custom_palette[1:10], 'white')
#make matrix with labels for heatmap
labels_tab <- overview_t
labels_tab[labels_tab<0.01]<-"<0.01"
#make heatmap
phm<-pheatmap::pheatmap(overview_t,
breaks = bk1,
display_numbers = labels_tab,fontsize_number=11, number_color="black",
color = custom_palette2,
angle_col = 45,
cluster_rows=FALSE, cluster_cols=FALSE)ggsave("./smallInteractions2.pdf", plot = phm, dpi = 300)## Saving 7 x 5 in image